Main R script to determine: RNA copy number per cell RNA density number per cell Transcriptional properties Based on the output .csv table obtained with the Python script
Load libraries
library(ggplot2)
library (ggbeeswarm)
require(plyr) # Am I using this in the end?
## Loading required package: plyr
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ purrr 0.2.5
## ✔ tidyr 0.8.1 ✔ dplyr 0.7.5
## ✔ readr 1.1.1 ✔ stringr 1.3.1
## ✔ tibble 1.4.2 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ purrr::compact() masks plyr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
# library(tibble) #for add_column etc
library(knitr)
library(rmarkdown)
Read data and calculate RNA copy number and concentration (== density)
Lorena Garcia-Perez, Developmental Dynamics lab, The Francis Crick Institute, UK lorena.garcia-perez@crick.ac.uk
Another option is to execute the following: args = c(“Diff26_c2_data.csv”, “26_c2_copy_den_plots.pdf”, “26_c2_TX_plots.pdf”, “26_c2_heter_plots.pdf”, “26_c2_copyN.csv”, “26_c2_TX.csv”, “26_c2_heter.csv”) # #The first argument is the data to be read, #arguments 2-4 are the names of the pdf files to be created containing the plots, #arguments 5-7 are the names of the csv files to be created cotaining the extracted data
wd <- getwd()
setwd("..")
parent <- getwd()
file <- read.csv("./input/Diff26_c2_data.csv")
setwd(wd)
# To remove any row where the area = 0 due to clicking on a point during cell segmentation
file <- file[file$cell.area != 0, ]
kable(head(file))
| channel | day | image | cell.id | cell.area | spot.id | x | y | area | max.int | mean.int |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 10 | 148 | 3899 | 1 | 226.0 | 258.0 | 1 | 1947 | 1947 |
| 0 | 4 | 10 | 148 | 3899 | 2 | 223.5 | 258.0 | 2 | 1979 | 1968 |
| 0 | 4 | 10 | 148 | 3899 | 3 | 224.0 | 256.0 | 1 | 2030 | 2030 |
| 0 | 4 | 10 | 148 | 3899 | 4 | 234.0 | 255.3 | 3 | 2421 | 2212 |
| 0 | 4 | 10 | 148 | 3899 | 5 | 228.0 | 255.0 | 1 | 2049 | 2049 |
| 0 | 4 | 10 | 148 | 3899 | 6 | 220.0 | 254.0 | 1 | 2369 | 2369 |
# Colors
cbbPalette <- c("#000000", "#009E73", "#e79f00", "#9ad0f3", "#0072B2", "#D55E00",
"#CC79A7", "#F0E442")
days <- c(4,5,6)
days.names <- c("day 4", "day 5", "day 6")
channels <- c(0, 1, 2)
genes <- c("Irx3","Sox2","Olig2")
progenitors.gene <- genes[2]
# For spot selection as TX spot
MaxInt.alpha <- 2 # This value should lay between 1.5 to 3
area.alpha <- 2 # This value should lay between 1.5 to 3
# With 60X objective in BX61 microscope and previous ANDOR camera:
# Pixel size = 0.1107x0.1107 µm^2 = 0.01225 µm^2
pixel.size <- 0.01225
ploidy <- 3
#Correction factor that takes into account
#probe localization over gene:
#Irx3 reverse starnd transcribed, Sox2 and Olig2
correction.factor <- c(0.34, 0.5, 0.34)
genes.length <- c(4822, 3311, 4400) # channel 0, 1, 2 (Irx3, Sox2, Olig2)
vPol.chosen = 2040 # bp/min, as in Bahar Halpern
copy.density.cell <-
file %>%
mutate(cell.area = cell.area * pixel.size) %>% # Convert cell area in pixels to cell area in µm^2
group_by(channel, day, image, cell.id, cell.area) %>%
summarise(copy = n()) %>% # RNA copy number per cell
mutate(density = round(copy / cell.area, 3)) # RNA densities in terms of RNA molecules per µm^2
kable(head(copy.density.cell))
| channel | day | image | cell.id | cell.area | copy | density |
|---|---|---|---|---|---|---|
| 0 | 4 | 3 | 36 | 24.90 | 7 | 0.281 |
| 0 | 4 | 3 | 37 | 30.26 | 15 | 0.496 |
| 0 | 4 | 3 | 38 | 19.33 | 14 | 0.724 |
| 0 | 4 | 3 | 39 | 37.71 | 42 | 1.114 |
| 0 | 4 | 3 | 40 | 39.62 | 32 | 0.808 |
| 0 | 4 | 3 | 41 | 32.89 | 13 | 0.395 |
To determine which spots can be considered transcription spots, we need to know the area and maximum intensity values for all the spots we normalize according to each channel, day and image
TX.threholds <-
file %>%
group_by(channel, day, image) %>% # Choose threholds specifically for each image and channel
summarise(MaxInt.median = median(max.int), MaxInt.mad = mad(max.int),
area.median = median(area), area.mad = mad(area)) %>%
mutate(MaxInt.thres = MaxInt.median + 3 * MaxInt.alpha * MaxInt.mad,
area.thres = area.median + 3 * area.alpha * area.mad)
kable(head(TX.threholds))
| channel | day | image | MaxInt.median | MaxInt.mad | area.median | area.mad | MaxInt.thres | area.thres |
|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 3 | 1894 | 282.4 | 2 | 1.483 | 3588 | 10.9 |
| 0 | 4 | 4 | 1547 | 198.7 | 2 | 1.483 | 2739 | 10.9 |
| 0 | 4 | 5 | 1755 | 296.5 | 2 | 1.483 | 3534 | 10.9 |
| 0 | 4 | 6 | 1727 | 209.0 | 2 | 1.483 | 2981 | 10.9 |
| 0 | 4 | 7 | 1454 | 222.4 | 2 | 1.483 | 2788 | 10.9 |
| 0 | 4 | 9 | 1628 | 247.6 | 2 | 1.483 | 3114 | 10.9 |
TX.number.cell <-
file %>%
group_by(channel, day, image) %>%
left_join(TX.threholds) %>%
mutate(is.TX.spot = max.int > MaxInt.thres & area > area.thres) %>%
group_by(channel, day, image, cell.id) %>%
summarise(TX.number = sum(is.TX.spot))
## Joining, by = c("channel", "day", "image")
kable(head(TX.number.cell))
| channel | day | image | cell.id | TX.number |
|---|---|---|---|---|
| 0 | 4 | 3 | 36 | 0 |
| 0 | 4 | 3 | 37 | 0 |
| 0 | 4 | 3 | 38 | 0 |
| 0 | 4 | 3 | 39 | 0 |
| 0 | 4 | 3 | 40 | 0 |
| 0 | 4 | 3 | 41 | 0 |
TX.mol <-
file %>%
group_by(channel, day, image) %>%
left_join(TX.threholds) %>%
mutate(is.TX.spot = max.int > MaxInt.thres & area > area.thres) %>%
mutate(meanInt.median = median(mean.int)) %>%
filter(is.TX.spot == TRUE) %>%
mutate(probe.loc = channel) %>%
mutate(probe.loc = as.numeric(recode(probe.loc,
"0"=correction.factor[1],
"1"=correction.factor[2],
"2"=correction.factor[3]))) %>%
mutate(TX.mol = mean.int * area / (probe.loc * meanInt.median * area.median))
## Joining, by = c("channel", "day", "image")
TX.mol$channel <- as.factor(TX.mol$channel)
levels(TX.mol$channel) <- genes
kable(head(TX.mol))
| channel | day | image | cell.id | cell.area | spot.id | x | y | area | max.int | mean.int | MaxInt.median | MaxInt.mad | area.median | area.mad | MaxInt.thres | area.thres | is.TX.spot | meanInt.median | probe.loc | TX.mol |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Irx3 | 5 | 0 | 1287 | 2372 | 28 | 195.2 | 316.4 | 5 | 4837 | 3606 | 2168 | 329.9 | 1 | 0 | 4147 | 1 | TRUE | 2145 | 0.34 | 24.72 |
| Irx3 | 5 | 0 | 1314 | 1584 | 8 | 258.0 | 108.2 | 4 | 4412 | 3444 | 2168 | 329.9 | 1 | 0 | 4147 | 1 | TRUE | 2145 | 0.34 | 18.89 |
| Irx3 | 5 | 0 | 1316 | 1436 | 25 | 140.5 | 107.5 | 4 | 5530 | 5109 | 2168 | 329.9 | 1 | 0 | 4147 | 1 | TRUE | 2145 | 0.34 | 28.02 |
| Irx3 | 5 | 1 | 1338 | 1401 | 11 | 258.5 | 305.5 | 4 | 3615 | 3212 | 1740 | 266.9 | 1 | 0 | 3341 | 1 | TRUE | 1710 | 0.34 | 22.09 |
| Irx3 | 5 | 1 | 1327 | 2463 | 19 | 137.3 | 327.3 | 3 | 3410 | 2680 | 1740 | 266.9 | 1 | 0 | 3341 | 1 | TRUE | 1710 | 0.34 | 13.82 |
| Irx3 | 5 | 2 | 1415 | 1920 | 25 | 383.2 | 101.6 | 5 | 4024 | 3125 | 1826 | 344.0 | 1 | 0 | 3890 | 1 | TRUE | 1810 | 0.34 | 25.40 |
single.cell.data <-
inner_join(copy.density.cell, TX.number.cell) %>%
ungroup()
## Joining, by = c("channel", "day", "image", "cell.id")
single.cell.data$channel <- as.factor(single.cell.data$channel)
levels(single.cell.data$channel) <- genes
kable(head(single.cell.data))
| channel | day | image | cell.id | cell.area | copy | density | TX.number |
|---|---|---|---|---|---|---|---|
| Irx3 | 4 | 3 | 36 | 24.90 | 7 | 0.281 | 0 |
| Irx3 | 4 | 3 | 37 | 30.26 | 15 | 0.496 | 0 |
| Irx3 | 4 | 3 | 38 | 19.33 | 14 | 0.724 | 0 |
| Irx3 | 4 | 3 | 39 | 37.71 | 42 | 1.114 | 0 |
| Irx3 | 4 | 3 | 40 | 39.62 | 32 | 0.808 | 0 |
| Irx3 | 4 | 3 | 41 | 32.89 | 13 | 0.395 | 0 |
single.cell.expressing.data <-
single.cell.data %>%
filter(density >= 1)
kable(head(single.cell.expressing.data))
| channel | day | image | cell.id | cell.area | copy | density | TX.number |
|---|---|---|---|---|---|---|---|
| Irx3 | 4 | 3 | 39 | 37.71 | 42 | 1.114 | 0 |
| Irx3 | 4 | 3 | 42 | 26.88 | 46 | 1.712 | 0 |
| Irx3 | 4 | 3 | 46 | 27.80 | 58 | 2.087 | 0 |
| Irx3 | 4 | 3 | 49 | 25.00 | 25 | 1.000 | 0 |
| Irx3 | 4 | 3 | 51 | 24.92 | 33 | 1.324 | 0 |
| Irx3 | 4 | 3 | 82 | 29.67 | 34 | 1.146 | 0 |
single.cell.sox2.expressing.data <- # This could be combined within single.cell.progenitors.data with filter_if() ?
single.cell.data %>%
filter(channel == progenitors.gene) %>%
filter(density >= 1) # Require sox2 density >= 1 molecule / µm^2
kable(head(single.cell.sox2.expressing.data))
| channel | day | image | cell.id | cell.area | copy | density | TX.number |
|---|---|---|---|---|---|---|---|
| Sox2 | 4 | 3 | 36 | 24.90 | 61 | 2.449 | 0 |
| Sox2 | 4 | 3 | 37 | 30.26 | 76 | 2.512 | 0 |
| Sox2 | 4 | 3 | 38 | 19.33 | 48 | 2.483 | 0 |
| Sox2 | 4 | 3 | 39 | 37.71 | 81 | 2.148 | 0 |
| Sox2 | 4 | 3 | 40 | 39.62 | 94 | 2.373 | 0 |
| Sox2 | 4 | 3 | 41 | 32.89 | 66 | 2.007 | 0 |
single.cell.progenitors.data <- # Select progenitors according to sox2 density >= 1 molecule / µm^2
semi_join(single.cell.data, single.cell.sox2.expressing.data,
by = c("day", "image", "cell.id", "cell.area"))
kable(head(single.cell.progenitors.data))
| channel | day | image | cell.id | cell.area | copy | density | TX.number |
|---|---|---|---|---|---|---|---|
| Irx3 | 4 | 3 | 36 | 24.90 | 7 | 0.281 | 0 |
| Irx3 | 4 | 3 | 37 | 30.26 | 15 | 0.496 | 0 |
| Irx3 | 4 | 3 | 38 | 19.33 | 14 | 0.724 | 0 |
| Irx3 | 4 | 3 | 39 | 37.71 | 42 | 1.114 | 0 |
| Irx3 | 4 | 3 | 40 | 39.62 | 32 | 0.808 | 0 |
| Irx3 | 4 | 3 | 41 | 32.89 | 13 | 0.395 | 0 |
# This is the more complete, non subseted, dataset
# But, if a cell has two TX spots, it will appear on two rows and so on
withTX.single.cell.data <-
TX.mol %>%
select (channel, day, image, cell.id, spot.id, TX.mol) %>%
full_join(single.cell.data) %>%
arrange(channel, day, image, cell.id) %>%
replace_na(list(TX.mol = 0))
## Joining, by = c("channel", "day", "image", "cell.id")
kable(head(withTX.single.cell.data))
| channel | day | image | cell.id | spot.id | TX.mol | cell.area | copy | density | TX.number |
|---|---|---|---|---|---|---|---|---|---|
| Irx3 | 4 | 3 | 36 | NA | 0 | 24.90 | 7 | 0.281 | 0 |
| Irx3 | 4 | 3 | 37 | NA | 0 | 30.26 | 15 | 0.496 | 0 |
| Irx3 | 4 | 3 | 38 | NA | 0 | 19.33 | 14 | 0.724 | 0 |
| Irx3 | 4 | 3 | 39 | NA | 0 | 37.71 | 42 | 1.114 | 0 |
| Irx3 | 4 | 3 | 40 | NA | 0 | 39.62 | 32 | 0.808 | 0 |
| Irx3 | 4 | 3 | 41 | NA | 0 | 32.89 | 13 | 0.395 | 0 |
two.alleles.data <-
TX.mol %>%
select(channel, day, image, cell.id, TX.mol) %>%
group_by(channel, day, image, cell.id) %>%
mutate(freq = n()) %>%
ungroup() %>%
filter(freq == 2) %>%
select(-freq) %>%
mutate(TX.id = rep(c(1,2), length.out = length(.$channel))) %>% # Create a column with appropiate numebr of rows
spread(TX.id, TX.mol) %>%
rename(TX.id.1 = "1", TX.id.2 = "2")
two.alleles.data$channel <- as.factor(two.alleles.data$channel )
levels(two.alleles.data$channel) <- genes
kable(head(two.alleles.data))
| channel | day | image | cell.id | TX.id.1 | TX.id.2 |
|---|---|---|---|---|---|
| Irx3 | 5 | 8 | 1171 | 32.79 | 23.44 |
| Irx3 | 5 | 9 | 1207 | 28.36 | 22.51 |
| Irx3 | 6 | 9 | 2083 | 29.24 | 26.39 |
| Sox2 | 4 | 5 | 136 | 22.51 | 27.95 |
| Sox2 | 4 | 5 | 146 | 17.37 | 31.55 |
| Sox2 | 4 | 5 | 152 | 18.81 | 11.57 |
single.cell.progenitors.data is used, but results with single.cell.data are quite similar
Lorena Garcia-Perez, Developmental Dynamics lab, The Francis Crick Institute, UK lorena.garcia-perez@crick.ac.uk
Trancription probability
prob.TX.fn <- function(data, day.chosen, gene.chosen, ploidy) {
# day.chosen and gene.schosen must be input as character (for example, "4" and "Olig2")
# ploidy must be input as numeric
day.gene.TX.subset <-
pull(filter(data, day == day.chosen & channel == gene.chosen), TX.number)
return(sum(day.gene.TX.subset)/(ploidy*length(day.gene.TX.subset)))
}
Mean RNA copy number
mean.copy.fn <- function(data, day.chosen, gene.chosen) {
# day.chosen and gene.schosen must be input as character (for example, "4" and "Olig2")
day.gene.copy.subset <-
pull(filter(data, day == day.chosen & channel == gene.chosen), copy)
return(mean(day.gene.copy.subset))
}
Mean number of RNA pol = mean number of molecules at TX spot
mean.nPol.fn <- function(data, day.chosen, gene.chosen) {
# day.chosen and gene.schosen must be input as character (for example, "4" and "Olig2")
day.gene.nPol.subset <-
pull(filter(data, day == day.chosen & channel == gene.chosen), TX.mol)
return(mean(day.gene.nPol.subset))
}
Chose white or dark themes as part of the plotting funtion inputs
basicTheme <-list(
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold",
margin = margin(t = 0, r = 0, b = 30, l = 0)),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.line = element_line(size = 1.5),
axis.ticks = element_line(size = 1.5),
axis.ticks.length = unit(0.15,"cm"),
strip.text.x = element_text(size = 15, face = "bold"),
panel.spacing = unit(1, "lines"),
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", colour = NA), # bg of the plot
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
legend.box.background = element_rect(fill = "transparent")) # get rid of legend panel bg
)
whiteTheme <- list(theme())
darkTheme <- list(
theme(axis.text = element_text(colour = "lightgrey"),
axis.title = element_text(colour = "lightgrey"),
axis.line = element_line(colour = "lightgrey"),
axis.ticks = element_line(colour = "lightgrey"),
strip.text.x = element_text(colour = "lightgrey"))
)
RNA copy number and RNA concentration (== density) plots
copy.box.plot <-
function(data, Theme = whiteTheme) {
data$day <- as.factor(data$day)
levels(data$day) <- days.names
plots <-
ggplot(data, aes(day, copy, group=day)) +
geom_boxplot(outlier.shape = NA, fill = NA, colour = cbbPalette[2], lwd = 1) +
geom_quasirandom(shape = 21, fill = "lightgrey", colour = "black", size = 0.75,
alpha = 0.4) +
ylab("RNAs per cell") +
xlab("") +
ylim(0, max(data$copy)) +
labs (title = "RNA copy number") +
theme_classic() +
basicTheme +
Theme +
theme(plot.title=element_text(colour = cbbPalette[2]),
strip.background = element_rect(fill = cbbPalette[2], colour = cbbPalette[2])) +
facet_wrap(~ channel)
ggsave(paste("./figures/copy_box_plot_", deparse(substitute(Theme)), ".pdf", sep=""),
plots, width = 10, height = 5)
print(plots)
}
concentration.box.plot <-
function(data, Theme = whiteTheme) {
data$day <- as.factor(data$day)
levels(data$day) <- days.names
plots <-
ggplot(data, aes(day, density, group=day)) +
geom_boxplot(outlier.shape = NA, fill = NA, colour = cbbPalette[3], lwd = 1) +
geom_quasirandom(shape = 21, fill = "lightgrey", colour = "black", size = 0.75,
alpha = 0.4) +
ylab(bquote("RNAs/" ~ mu*m^2)) +
xlab("") +
ylim(0, max(data$density)) +
labs (title = "RNA concentration") +
theme_classic() +
basicTheme +
Theme +
theme(plot.title=element_text(colour = cbbPalette[3]),
strip.background = element_rect(fill = cbbPalette[3], colour = cbbPalette[3])) +
facet_wrap(~ channel)
ggsave(paste("./figures/concentration_box_plot_", deparse(substitute(Theme)), ".pdf", sep=""),
plots, width = 10, height = 5)
print(plots)
}
channel.1 and channel.2 must be supplied as strings data must have channel as factor
copy.pairwise.scatter.plot <- # Could try to make axis limit specific for the pair of genes being plot each time...
function(data, channel.1, channel.2, Theme = whiteTheme) {
data.subset <-
data %>%
select(channel, day, image, cell.id, cell.area, copy) %>%
spread(channel, copy)
plots <-
ggplot(data.subset, aes_string(channel.2, channel.1)) +
geom_point(shape = 21, fill = "lightgrey", colour = "black",
size = 1.5, alpha = 0.4) +
geom_density_2d(size = 0.5, colour = cbbPalette[2], bins = 10) +
labs (title = "RNA copy number") +
scale_x_continuous(name = bquote(.(channel.2) ~ " RNAs per cell"),
breaks = seq(0, max(data$copy), 50),
limits = c(0, max(data$copy))) +
scale_y_continuous(name = bquote(.(channel.1) ~ " RNAs per cell"),
breaks = seq(0, max(data$copy), 50),
limits = c(0, max(data$copy))) +
theme_classic() +
basicTheme +
Theme +
theme(plot.title=element_text(colour = cbbPalette[2]),
strip.background = element_rect(fill = cbbPalette[2], colour = cbbPalette[2])) +
facet_wrap(~ day)
ggsave(paste("./figures/copy_pairwise_scatter_plot_", deparse(substitute(Theme)),"_",
channel.2,"_",channel.1,".pdf", sep=""),
plots, width = 10, height = 5)
print(plots)
}
concentration.pairwise.scatter.plot <- # Could try to make axis limit specific for the pair of genes being plot each time...
function(data, channel.1, channel.2, Theme = whiteTheme) {
data.subset <-
data %>%
select(channel, day, image, cell.id, cell.area, density) %>%
spread(channel, density)
plots <-
ggplot(data.subset, aes_string(channel.2, channel.1)) +
geom_point(shape = 21, fill = "lightgrey", colour = "black",
size = 1.5, alpha = 0.4) +
geom_density_2d(size = 0.5, colour = cbbPalette[3], bins = 10) +
labs (title = "RNA cconcentration") +
scale_x_continuous(name = bquote(.(channel.2) ~ " RNAs/" ~ mu*m^2),
breaks = seq(0, max(data$density), 2),
limits = c(0, max(data$density))) +
scale_y_continuous(name = bquote(.(channel.1) ~ " RNAs/" ~ mu*m^2),
breaks = seq(0, max(data$density), 2),
limits = c(0, max(data$density))) +
theme_classic() +
basicTheme +
Theme +
theme(plot.title=element_text(colour = cbbPalette[3]),
strip.background = element_rect(fill = cbbPalette[3], colour = cbbPalette[3])) +
facet_wrap(~ day)
ggsave(paste("./figures/concentration_pairwise_scatter_plot_", deparse(substitute(Theme)),"_",
channel.2,"_",channel.1,".pdf", sep=""),
plots, width = 10, height = 5)
print(plots)
}
mean.properties.data <-
tibble(day = character(), gene = character(),
prob.TX = as.numeric(), mean.copy = as.numeric(), mean.nPol = as.numeric())
for (d in days) {
for (g in genes) {
mean.properties.data <-
add_row(mean.properties.data,
day = d, gene = g,
prob.TX = round(prob.TX.fn(single.cell.progenitors.data, d, g, ploidy),3),
mean.copy = round(mean.copy.fn(single.cell.progenitors.data, d, g),3),
mean.nPol = round(mean.nPol.fn(TX.mol, d, g),3))
}
}
mean.properties.data <- mean.properties.data %>% replace_na(list(mean.nPol = 0))
mean.properties.data <-
mean.properties.data %>%
mutate(vPol = vPol.chosen, gene.length = rep(genes.length,3), ploidy = ploidy) %>%
mutate(TX.rate = mean.nPol * vPol / gene.length) %>%
mutate(deg.rate = prob.TX * TX.rate * ploidy / mean.copy) %>%
mutate(half.life = log(2)/deg.rate) #in min
kable(head(mean.properties.data))
| day | gene | prob.TX | mean.copy | mean.nPol | vPol | gene.length | ploidy | TX.rate | deg.rate | half.life |
|---|---|---|---|---|---|---|---|---|---|---|
| 4 | Irx3 | 0.000 | 25.166 | 0.00 | 2040 | 4822 | 3 | 0.000 | 0.0000 | Inf |
| 4 | Sox2 | 0.041 | 58.186 | 18.89 | 2040 | 3311 | 3 | 11.639 | 0.0246 | 28.172 |
| 4 | Olig2 | 0.008 | 7.899 | 38.70 | 2040 | 4400 | 3 | 17.941 | 0.0545 | 12.716 |
| 5 | Irx3 | 0.015 | 25.281 | 23.27 | 2040 | 4822 | 3 | 9.846 | 0.0175 | 39.549 |
| 5 | Sox2 | 0.040 | 38.227 | 16.76 | 2040 | 3311 | 3 | 10.325 | 0.0324 | 21.387 |
| 5 | Olig2 | 0.020 | 12.215 | 33.23 | 2040 | 4400 | 3 | 15.404 | 0.0757 | 9.161 |
prob.TX.pairs.data <-
mean.properties.data %>%
select(day, gene, prob.TX) %>%
rename(gene.1 = gene, prob.TX.1 = prob.TX) %>%
mutate(gene.2 = gene.1) %>%
expand(nesting(day, gene.1, prob.TX.1), gene.2) %>%
group_by(day) %>%
mutate(prob.TX.2 = rep(unique(prob.TX.1), times = 3)) %>%
filter(gene.1 < gene.2) # To get unique combinations
kable(head(prob.TX.pairs.data))
| day | gene.1 | prob.TX.1 | gene.2 | prob.TX.2 |
|---|---|---|---|---|
| 4 | Irx3 | 0.000 | Olig2 | 0.008 |
| 4 | Irx3 | 0.000 | Sox2 | 0.041 |
| 4 | Olig2 | 0.008 | Sox2 | 0.041 |
| 5 | Irx3 | 0.015 | Olig2 | 0.020 |
| 5 | Irx3 | 0.015 | Sox2 | 0.040 |
| 5 | Olig2 | 0.020 | Sox2 | 0.040 |
for (d in days) { for (g in genes) { prob.TX.pairs.data\(prob.TX.both <- add_row(prob.TX.pairs.data\)prob.TX.both, prob.TX.pairs.fn())
prob.TX.pairs.fn <- function(data, day.chosen, gene.chosen.1, gene.chosen.2, ploidy) {
# day.chosen and gene.schosen must be input as character (for example, "4" and "Olig2")
# ploidy must be input as numeric
day.gene.TX.subset <-
filter(data, day == day.chosen & (channel == gene.chosen.1 | channel == gene.chosen.2))
dual.TX <-
day.gene.TX.subset %>%
select(channel, image, day, cell.id, TX.number) %>%
filter (TX.number != 0) %>%
group_by(day, image, cell.id) %>%
mutate(freq = n()) %>%
ungroup() %>%
filter(freq == 2) %>%
select(-freq) %>%
spread(channel, TX.number) %>%
return(nrow(dual.TX)/(nrow(day.gene.TX.subset))) # Here I do not make use of the ploidy concept...
}
two.alleles.data\(channel <- as.factor(two.alleles.data\)channel ) levels(two.alleles.data$channel) <- genes
TX.probability.all[,6] <- TX.probability.all\(TX.prob.pair / (TX.probability.all\)TX.prob.oneC * TX.probability.all$TX.prob.otherC) colnames(TX.probability.all)[6] <- c(“extrinsic.noise”)
genes.pairwise <-
tibble(genes) %>%
rename(gene.1 = genes) %>%
mutate(gene.2 = gene.1) %>%
expand(gene.1, gene.2) %>%
filter(gene.1 < gene.2) # To get unique combinations
setwd(parent)
copy.box.plot(single.cell.progenitors.data)
copy.box.plot(single.cell.progenitors.data, darkTheme)
concentration.box.plot(single.cell.progenitors.data)
concentration.box.plot(single.cell.progenitors.data, darkTheme)
for (g in c(1,2,3)) {
copy.pairwise.scatter.plot(
single.cell.progenitors.data,
genes.pairwise$gene.1[g], genes.pairwise$gene.2[g],
whiteTheme)
copy.pairwise.scatter.plot(
single.cell.progenitors.data,
genes.pairwise$gene.1[g], genes.pairwise$gene.2[g],
darkTheme)
concentration.pairwise.scatter.plot(
single.cell.progenitors.data,
genes.pairwise$gene.1[g], genes.pairwise$gene.2[g],
whiteTheme)
concentration.pairwise.scatter.plot(
single.cell.progenitors.data,
genes.pairwise$gene.1[g], genes.pairwise$gene.2[g],
darkTheme)
}
source(“26_cell_area.R”)
setwd(wd)
files <- list.files()
index <- grep("html", files)
setwd(parent)
file.rename(file.path("R", files[index]),
file.path("docs", files[index]))
## Warning in file.rename(file.path("R", files[index]), file.path("docs",
## files[index])): cannot rename file 'R/func.nb.html' to 'docs/func.nb.html',
## reason 'No such file or directory'
## Warning in file.rename(file.path("R", files[index]), file.path("docs",
## files[index])): cannot rename file 'R/load_clean.nb.html' to 'docs/
## load_clean.nb.html', reason 'No such file or directory'
## [1] FALSE FALSE
file\(channel <- as.factor(file\)channel) levels(file$channel) <- genes
rect <- data.frame(xmin=threshold.area, xmax=Inf, ymin=threshold.MaxInt, ymax=Inf, channel=c(0,1,2)) rect\(channel <- as.factor(rect\)channel) levels(rect$channel) <- genes
source(“26_spot_area_maxInt.R”)
source(“26_TXnumber_vs.R”)
source(“26_barTXprob.R”)
source(“26_barTXprobExtrinsic.R”)
nested <- total %>% group_by(day, image, cell.id, cell.area, channel, copy, density, TX.number) %>% nest() %>% group_by(day, image, cell.id, cell.area, channel) %>% nest() %>% group_by(day, image, cell.id, cell.area) %>% nest() %>% group_by(day) %>% nest()
myjson <- toJSON(nested, pretty = TRUE, auto_unbox = TRUE)
write(myjson, “single-RNAs_TX-RNAs_cell.json”)
library(rjson) json_data <- fromJSON(file=“single-RNAs_TX-RNAs_cell.json”)
source(“26_boxTXmolecules.R”)
source(“26_TX2alleles.R”)
plots <- c(hist.spots.area, hist.spots.MaxInt, p.spots.AreaVsMaxInt, p.TX.numberVsCopy, p.TX.numberVsDensity, p.TX.numberVsCellArea, bar.TX.prob, bar.TX.prob.extrinsic, box.TX.molecules, p.TX.2alleles.molecules.final)
var(spots.cell\(copy.C1) / mean (spots.cell\)copy.C1)
FF.expressing <- data.frame(FF.C0=as.numeric(), FF.C1=as.numeric(), FF.C2=as.numeric()) i <- 1 for (d in days) { temp <- spots.cell.expressing[[1]] temp2 <- spots.cell.expressing[[2]] temp3 <- spots.cell.expressing[[3]] FF.expressing[i,] <- c(var(temp\(copy.C0[temp\)day==d]) / mean(temp\(copy.C0[temp\)day==d]), var(temp2\(copy.C1[temp2\)day==d]) / mean(temp2\(copy.C1[temp2\)day==d]), var(temp3\(copy.C2[temp3\)day==d]) / mean(temp3\(copy.C2[temp3\)day==d])) i <- i + 1 }
write.csv(spots.cell, “spots_cell.csv”, row.names=FALSE) write.csv(spots.cell.2, “spots_cell2.csv”, row.names=FALSE)
write.csv(spots.cell.pro, “spots_cell_pro.csv”, row.names=FALSE) write.csv(spots.cell.2.pro, “spots_cell2_pro.csv”, row.names=FALSE)
write.csv(TX.molecules, “TX_molecules.csv”, row.names=FALSE)